Import points which represent 1km grid centroids of a grid covering London
grid <- read.csv('X:/Projects/LAEI2013_TfL_Strategy/z_GridsForJames.csv')
grid <- grid[,c('GridIdEx', 'Easting', 'Northing')]
names(grid) <- c('grid_exact_cut', 'easting', 'northing')
This gives a table of 3355 rows, show below.
| grid_exact_cut | easting | northing |
|---|---|---|
| 32 | 517500 | 201500 |
| 33 | 518500 | 201500 |
| 34 | 519500 | 201500 |
| 35 | 520500 | 201500 |
| 36 | 521500 | 201500 |
| 37 | 543500 | 201500 |
Now import traffic flows by toid and grid exact cut, and edit/clean to get what we need
traffic_flows <- read.csv('X:/Projects/LAEI2013_Gdrive/TrafficFlows/Major_WithAdjustedIbus_AADT/z_AADT_ExactCut_WithIbus_2013.csv')
names(traffic_flows) <- tolower(names(traffic_flows))
drop <- c('year', 'location_exactcut', 'boroughname_exactcut', 'tlrn', 'irr', 'motorwaynumber', 'petrolcar', 'dieselcar', 'electriccar', 'petrollgv', 'diesellgv', 'electriclgv', 'ltbus', 'coach')
traffic_flows <- traffic_flows[, !(names(traffic_flows) %in% drop)]
traffic_flows$toid <- as.character(traffic_flows$toid)
traffic_flows$total_vehicles <- (traffic_flows$motorcycle + traffic_flows$taxi + traffic_flows$car + traffic_flows$busandcoach + traffic_flows$lgv + traffic_flows$rigid2axle + traffic_flows$rigid3axle + traffic_flows$rigid4axle + traffic_flows$artic3axle + traffic_flows$artic5axle + traffic_flows$artic6axle)
traffic_flows$total_light <- (traffic_flows$motorcycle + traffic_flows$taxi + traffic_flows$car)
traffic_flows$total_heavy <- (traffic_flows$busandcoach + traffic_flows$lgv + traffic_flows$rigid2axle + traffic_flows$rigid3axle + traffic_flows$rigid4axle + traffic_flows$artic3axle + traffic_flows$artic5axle + traffic_flows$artic6axle)
drop <- c('motorcycle', 'taxi', 'car', 'busandcoach', 'lgv', 'rigid2axle', 'rigid3axle', 'rigid4axle', 'artic3axle', 'artic5axle', 'artic6axle')
traffic_flows <- traffic_flows[, !(names(traffic_flows) %in% drop)]
This gives a table of 87996 rows, show below.
| toid | grid_exactcut_id | dotref | id | speed | total_vehicles | total_light | total_heavy |
|---|---|---|---|---|---|---|---|
| 4000000030455238 | 2739 | 26145 | 0 | 48.92023 | 28315.654 | 23554.666 | 4760.9877 |
| 4000000030455245 | 2738 | 8468 | 0 | 59.23806 | 85580.440 | 68722.520 | 16857.9195 |
| 4000000030455249 | 2739 | 8468 | 0 | 13.03565 | 85580.440 | 68722.520 | 16857.9195 |
| 4000000030455250 | 2738 | 8468 | 0 | 27.10584 | 85580.440 | 68722.520 | 16857.9195 |
| 4000000030455254 | 818 | 0 | 13633 | 13.94029 | 4395.482 | 3897.315 | 498.1674 |
| 4000000030455271 | 2160 | 999957 | 0 | 30.18082 | 15938.228 | 13188.405 | 2749.8227 |
road_info <- read.csv('erg_roads_info.csv')
names(road_info) <- tolower(names(road_info))
road_info <- road_info[,c('toid', 'desc_term')]
road_info$toid <- as.character(road_info$toid)
Import emissions by toid and grid exact cut
emissions <- read.csv('X:/Projects/LAEI2013_TfL_Strategy/FilesForHerald4/EmissionsByLink/INPUT_RESULTS_LAEI_ExactCutIntersect_Major_2013_LAEI_V117.csv')
names(emissions) <- tolower(names(emissions))
drop <- c('emissions', 'year', 'petrolcar', 'dieselcar', 'petrollgv', 'diesellgv', 'ltbus', 'coach', 'electriccar', 'electriclgv')
emissions <- emissions[, !(names(emissions) %in% drop)]
emissions$total_emissions <- emissions$motorcycle + emissions$taxi + emissions$car + emissions$busandcoach + emissions$lgv + emissions$rigid + emissions$artic + emissions$rigid2axle + emissions$rigid3axle + emissions$rigid4axle + emissions$artic3axle + emissions$artic5axle + emissions$artic6axle
emissions$toid <- as.character(emissions$toid)
drop <- c('motorcycle', 'taxi', 'car', 'busandcoach', 'lgv', 'rigid', 'artic', 'rigid2axle', 'rigid3axle', 'rigid4axle', 'artic3axle', 'artic5axle', 'artic6axle')
emissions <- emissions[, !(names(emissions) %in% drop)]
emissions <- emissions[!(emissions$pollutant %in% c('TB_PM10_SW', 'TB_PM25_SW')),]
Not going to need all the constiuent parts of the emissions, just the total by each. So aggregate up.
emissions$pollutant <- as.character(emissions$pollutant)
emissions[grep('pm25', tolower(emissions$pollutant)),'pollutant'] <- 'pm25'
emissions[grep('pm10', tolower(emissions$pollutant)),'pollutant'] <- 'pm10'
emissions$gridid <- as.character(emissions$gridid)
emissions$grid_exactcut_id <- as.character(emissions$grid_exactcut_id)
emissions$dotref <- as.character(emissions$dotref)
emissions$pollutant <- tolower(emissions$pollutant)
emissions <- aggregate(total_emissions ~ gridid + toid + grid_exactcut_id + dotref + length + pollutant, data=emissions, FUN=sum)
This gives a table of 205496 rows, show below.
| gridid | toid | grid_exactcut_id | dotref | length | pollutant | total_emissions |
|---|---|---|---|---|---|---|
| 6772 | 4000000030098250 | 106 | 16001 | 0 | no2 | 0 |
| 6772 | 4000000030098253 | 106 | 16001 | 0 | no2 | 0 |
| 6772 | 4000000030413089 | 106 | 16001 | 0 | no2 | 0 |
| 6772 | 4000000030413092 | 106 | 16001 | 0 | no2 | 0 |
| 6083 | 4000000027909026 | 12 | 16001 | 0 | no2 | 0 |
| 6083 | 4000000027909034 | 12 | 16001 | 0 | no2 | 0 |
traffic_flows <- merge(traffic_flows, road_info, by.x = 'toid', by.y = 'toid')
rm(road_info)
grid_traffic <- merge(traffic_flows, grid, by.x = c('grid_exactcut_id'), by.y = c('grid_exact_cut'))
rm(traffic_flows)
grid_traffic <- grid_traffic[order(grid_traffic$toid),]
Looks like this
| grid_exactcut_id | toid | dotref | id | speed | total_vehicles | total_light | total_heavy | desc_term | easting | northing | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 421 | 27 | 4000000027865913 | 16001 | 0 | 98.99142 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 201500 |
| 647 | 45 | 4000000027865913 | 16001 | 0 | 98.99142 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 200500 |
| 1138 | 74 | 4000000027865913 | 16001 | 0 | 98.99142 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 199500 |
| 1744 | 106 | 4000000027865913 | 16001 | 0 | 98.99142 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 198500 |
| 423 | 27 | 4000000027865914 | 16001 | 0 | 103.32334 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 201500 |
| 646 | 45 | 4000000027865914 | 16001 | 0 | 103.32334 | 88097.16 | 72202.86 | 15894.29 | Motorway | 512500 | 200500 |
Now take the mean traffic flow, on each toid, by grid square (using easting and northing)
grid_traffic <- aggregate(data = grid_traffic, FUN=mean, cbind(speed, total_vehicles, total_light, total_heavy)~ easting+northing+toid+desc_term)
Now looks like this
| easting | northing | toid | desc_term | speed | total_vehicles | total_light | total_heavy | |
|---|---|---|---|---|---|---|---|---|
| 83724 | 512500 | 198500 | 4000000027865913 | Motorway | 98.99142 | 88097.16 | 72202.86 | 15894.29 |
| 83725 | 512500 | 199500 | 4000000027865913 | Motorway | 98.99142 | 88097.16 | 72202.86 | 15894.29 |
| 83726 | 512500 | 200500 | 4000000027865913 | Motorway | 98.99142 | 88097.16 | 72202.86 | 15894.29 |
| 83727 | 512500 | 201500 | 4000000027865913 | Motorway | 98.99142 | 88097.16 | 72202.86 | 15894.29 |
| 83728 | 512500 | 198500 | 4000000027865914 | Motorway | 103.32334 | 88097.16 | 72202.86 | 15894.29 |
| 83729 | 512500 | 199500 | 4000000027865914 | Motorway | 103.32334 | 88097.16 | 72202.86 | 15894.29 |
emissions_grids <- merge(emissions, grid, by.x = 'grid_exactcut_id', by.y = 'grid_exact_cut')
rm(emissions)
emissions_grids <- emissions_grids[order(emissions_grids$toid),]
all_data <- merge(emissions_grids, grid_traffic, by.x = c('toid', 'easting', 'northing'), by.y = c('toid', 'easting', 'northing'))
rm(grid, grid_traffic, emissions_grids,drop)
Example of final data for NO2 and one grid
| toid | easting | northing | grid_exactcut_id | gridid | dotref | length | pollutant | total_emissions | desc_term | speed | total_vehicles | total_light | total_heavy | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 4000000027865913 | 512500 | 198500 | 106 | 6772 | 16001 | 51.510888 | no2 | 0.2727828 | Motorway | 98.99142 | 88097.16 | 72202.86 | 15894.294 |
| 20 | 4000000027865914 | 512500 | 198500 | 106 | 6772 | 16001 | 54.546206 | no2 | 0.2996166 | Motorway | 103.32334 | 88097.16 | 72202.86 | 15894.294 |
| 14865 | 4000000030098250 | 512500 | 198500 | 106 | 6772 | 16001 | 0.000000 | no2 | 0.0000000 | Motorway | 66.71721 | 88097.16 | 72202.86 | 15894.294 |
| 14869 | 4000000030098251 | 512500 | 198500 | 106 | 6772 | 16001 | 188.781649 | no2 | 1.0677528 | Motorway | 105.79689 | 88097.16 | 72202.86 | 15894.294 |
| 14875 | 4000000030098252 | 512500 | 198500 | 106 | 6772 | 16001 | 185.253756 | no2 | 1.0116819 | Motorway | 102.87508 | 88097.16 | 72202.86 | 15894.294 |
| 14877 | 4000000030098253 | 512500 | 198500 | 106 | 6772 | 16001 | 0.000000 | no2 | 0.0000000 | Motorway | 29.00185 | 88097.16 | 72202.86 | 15894.294 |
| 14881 | 4000000030098254 | 512500 | 198500 | 106 | 6772 | 26461 | 0.000000 | no2 | 0.0000000 | A Road | 60.32731 | 33932.38 | 29294.97 | 4637.408 |
| 14888 | 4000000030098255 | 512500 | 198500 | 106 | 6772 | 26461 | 144.506411 | no2 | 0.2242166 | A Road | 73.37013 | 33932.38 | 29294.97 | 4637.408 |
| 14891 | 4000000030098256 | 512500 | 198500 | 106 | 6772 | 26461 | 147.787159 | no2 | 0.2247083 | A Road | 81.29693 | 33932.38 | 29294.97 | 4637.408 |
| 14895 | 4000000030098257 | 512500 | 198500 | 106 | 6772 | 26461 | 0.000000 | no2 | 0.0000000 | A Road | 26.40186 | 33932.38 | 29294.97 | 4637.408 |
| 14897 | 4000000030098264 | 512500 | 198500 | 106 | 6772 | 16001 | 152.961181 | no2 | 0.8243357 | Motorway | 101.59566 | 88097.16 | 72202.86 | 15894.294 |
| 14904 | 4000000030098265 | 512500 | 198500 | 106 | 6772 | 16001 | 151.508332 | no2 | 0.7772227 | Motorway | 96.02209 | 88097.16 | 72202.86 | 15894.294 |
| 14907 | 4000000030098266 | 512500 | 198500 | 106 | 6772 | 26461 | 221.933185 | no2 | 0.4558797 | A Road | 34.06254 | 33932.38 | 29294.97 | 4637.408 |
| 14918 | 4000000030098273 | 512500 | 198500 | 106 | 6772 | 36001 | 35.607109 | no2 | 0.2035174 | Motorway | 103.17324 | 88111.81 | 70593.34 | 17518.467 |
| 14927 | 4000000030098274 | 512500 | 198500 | 106 | 6772 | 36001 | 0.000000 | no2 | 0.0000000 | Motorway | 76.95585 | 88111.81 | 70593.34 | 17518.467 |
| 28980 | 4000000030141133 | 512500 | 198500 | 106 | 6772 | 26461 | 182.812383 | no2 | 0.2738319 | A Road | 71.60664 | 33932.38 | 29294.97 | 4637.408 |
| 28984 | 4000000030141138 | 512500 | 198500 | 106 | 6772 | 26461 | 130.948457 | no2 | 0.2826997 | A Road | 34.74561 | 33932.38 | 29294.97 | 4637.408 |
| 28992 | 4000000030141139 | 512500 | 198500 | 106 | 6772 | 26461 | 104.652015 | no2 | 0.1726599 | A Road | 51.89077 | 33932.38 | 29294.97 | 4637.408 |
| 29016 | 4000000030141186 | 512500 | 198500 | 106 | 6772 | 36001 | 0.000000 | no2 | 0.0000000 | Motorway | 36.06457 | 88111.81 | 70593.34 | 17518.467 |
| 29023 | 4000000030141187 | 512500 | 198500 | 106 | 6772 | 36001 | 42.712267 | no2 | 0.2547503 | Motorway | 106.96976 | 88111.81 | 70593.34 | 17518.467 |
| 29036 | 4000000030141188 | 512500 | 198500 | 106 | 6772 | 6456 | 0.000000 | no2 | 0.0000000 | A Road | 21.84710 | 46901.22 | 41064.90 | 5836.322 |
| 29047 | 4000000030141189 | 512500 | 198500 | 106 | 6772 | 6456 | 61.520737 | no2 | 0.1216399 | A Road | 81.21151 | 46901.22 | 41064.90 | 5836.322 |
| 29060 | 4000000030141190 | 512500 | 198500 | 106 | 6772 | 6456 | 67.537773 | no2 | 0.1320438 | A Road | 76.09839 | 46901.22 | 41064.90 | 5836.322 |
| 29072 | 4000000030141191 | 512500 | 198500 | 106 | 6772 | 6456 | 0.000000 | no2 | 0.0000000 | A Road | 51.13605 | 46901.22 | 41064.90 | 5836.322 |
| 55288 | 4000000030227179 | 512500 | 198500 | 106 | 6772 | 26461 | 65.764735 | no2 | 0.1003584 | A Road | 79.59250 | 33932.38 | 29294.97 | 4637.408 |
| 55290 | 4000000030227180 | 512500 | 198500 | 106 | 6772 | 26461 | 65.408333 | no2 | 0.0997009 | A Road | 82.01272 | 33932.38 | 29294.97 | 4637.408 |
| 55300 | 4000000030227181 | 512500 | 198500 | 106 | 6772 | 26461 | 2.826270 | no2 | 0.0066291 | A Road | 30.47028 | 33932.38 | 29294.97 | 4637.408 |
| 55301 | 4000000030227190 | 512500 | 198500 | 106 | 6772 | 26461 | 50.609063 | no2 | 0.0831328 | A Road | 51.06479 | 33932.38 | 29294.97 | 4637.408 |
| 55318 | 4000000030227243 | 512500 | 198500 | 106 | 6772 | 36001 | 63.913122 | no2 | 0.3796453 | Motorway | 106.99156 | 88111.81 | 70593.34 | 17518.467 |
| 55322 | 4000000030227244 | 512500 | 198500 | 106 | 6772 | 36001 | 62.850492 | no2 | 0.3587555 | Motorway | 103.22376 | 88111.81 | 70593.34 | 17518.467 |
| 91746 | 4000000030333096 | 512500 | 198500 | 106 | 6772 | 26461 | 33.276764 | no2 | 0.1065320 | A Road | 12.54503 | 33932.38 | 29294.97 | 4637.408 |
| 125577 | 4000000030413089 | 512500 | 198500 | 106 | 6772 | 16001 | 0.000000 | no2 | 0.0000000 | Motorway | 87.21668 | 88097.16 | 72202.86 | 15894.294 |
| 125583 | 4000000030413090 | 512500 | 198500 | 106 | 6772 | 16001 | 11.496550 | no2 | 0.0648114 | Motorway | 105.77143 | 88097.16 | 72202.86 | 15894.294 |
| 125586 | 4000000030413091 | 512500 | 198500 | 106 | 6772 | 16001 | 11.741428 | no2 | 0.0640695 | Motorway | 102.86827 | 88097.16 | 72202.86 | 15894.294 |
| 125589 | 4000000030413092 | 512500 | 198500 | 106 | 6772 | 16001 | 0.000000 | no2 | 0.0000000 | Motorway | 77.77912 | 88097.16 | 72202.86 | 15894.294 |
| 125596 | 4000000030413098 | 512500 | 198500 | 106 | 6772 | 26461 | 0.000000 | no2 | 0.0000000 | A Road | 77.12659 | 33932.38 | 29294.97 | 4637.408 |
| 148396 | 4000000030456485 | 512500 | 198500 | 106 | 6772 | 26461 | 7.071063 | no2 | 0.0107419 | A Road | 81.16264 | 33932.38 | 29294.97 | 4637.408 |
| 159078 | 4000000030474524 | 512500 | 198500 | 106 | 6772 | 26461 | 3.434188 | no2 | 0.0054304 | A Road | 63.76062 | 33932.38 | 29294.97 | 4637.408 |
| 168594 | 4000000030757876 | 512500 | 198500 | 106 | 6772 | 26461 | 18.034695 | no2 | 0.0287095 | A Road | 63.86696 | 33932.38 | 29294.97 | 4637.408 |
| 200373 | 4000000031239134 | 512500 | 198500 | 106 | 6772 | 26461 | 4.869538 | no2 | 0.0095019 | A Road | 36.15354 | 33932.38 | 29294.97 | 4637.408 |
| 200391 | 4000000031239138 | 512500 | 198500 | 106 | 6772 | 26461 | 5.024591 | no2 | 0.0097761 | A Road | 35.98589 | 33932.38 | 29294.97 | 4637.408 |
First sum emissions by the 1km, to 1km grid, and create raster stacks from them
ukgrid = "+init=epsg:27700"
one_km_emissions <- aggregate(data = all_data, total_emissions ~ pollutant + easting + northing, FUN=sum)
sources <- as.character((unique(one_km_emissions$pollutant)))
for (i in 1:length(sources)) {
temp <- one_km_emissions[one_km_emissions$pollutant == sources[i],]
coordinates(temp) <- ~ easting + northing
temp$pollutant <- NULL
gridded(temp) <- TRUE
proj4string(temp) <- CRS(ukgrid)
temp <- raster(temp)
names(temp) <- tolower(sources[i])
if (i == 1) { one_km_emissions_raster <- temp} else {one_km_emissions_raster <- stack(one_km_emissions_raster, temp)}
}
NO2 1km plot
Now make a 10km plot
ten_km_emissions_raster <- aggregate(one_km_emissions_raster, fact = 10, fun=sum)
rm(one_km_emissions, one_km_emissions_raster, temp, i)
NO2 10km plot
Now need to think about how these concentrations at 10km level, relate to the road link that contributed them, and get the relationship between them. First convert the raster to polygons.
ten_km_emissions_polygons <- rasterToPolygons(ten_km_emissions_raster)
NO2 10km polygons plot
Turn the originl data into points, so that can use these to extract the values from the 10km polygons
coordinates(all_data) <- ~ easting + northing
proj4string(all_data) <- CRS(ukgrid)
This code now looks at each road, sees which overall 10km grid it is in, and then divides each pollutant by the total of that pollutant in the square. Puts the result into ‘ten_km_contribution’ variable.
result <- point.in.poly(all_data, ten_km_emissions_polygons)
result <- data.frame(result)
result$ten_km_contribution <- NA
result[result$pollutant == 'no2',]$ten_km_contribution <- result[result$pollutant == 'no2',]$total_emissions / result[result$pollutant == 'no2',]$no2
result[result$pollutant == 'nox',]$ten_km_contribution <- result[result$pollutant == 'nox',]$total_emissions / result[result$pollutant == 'nox',]$nox
result[result$pollutant == 'pm10',]$ten_km_contribution <- result[result$pollutant == 'pm10',]$total_emissions / result[result$pollutant == 'pm10',]$pm10
result[result$pollutant == 'pm25',]$ten_km_contribution <- result[result$pollutant == 'pm25',]$total_emissions / result[result$pollutant == 'pm25',]$pm25
rm(all_data)
Some toid exact cuts (9616) have zero emissions, remove those from the total of 205496.
result <- result[result$total_emissions > 0,]
Now before create a model to calculate a regression between the variables and the emissions, aggregate from TOID exact cuts, up to DotRef. This means that we go from 48970 ‘streets’ …..
result <- merge(aggregate(data = result, cbind(length, total_emissions, ten_km_contribution) ~ dotref + desc_term + pollutant, sum),
aggregate(data = result, cbind(speed, total_vehicles, total_light, total_heavy) ~ dotref + desc_term + pollutant, mean),
by = c('dotref', 'pollutant'))
#test <- merge(aggregate(data = result, cbind(length, total_emissions, ten_km_contribution) ~ dotref + pollutant, sum),
# aggregate(data = result, cbind(speed, total_vehicles, total_light, total_heavy) ~ dotref + pollutant, mean),
# by = c('dotref', 'pollutant'))
… down to 2990.
ggplot(result, aes(ten_km_contribution)) +
geom_histogram(bins = 60, aes(fill=pollutant)) +
facet_wrap(.~pollutant, scales = 'free') +
xlab('Contribution to 10km emissions')
Take 80% of the data as a training data set
training_index <- sample(nrow(result), round(nrow(result) * 0.8,0))
training_data <- result[training_index,]
Seperate the other 20% as a testing data set
testing_data <- result[-training_index,]
Make a liner regression model using the training data
model_results <- data.frame(pollutant = NA,
intercept = as.numeric(''),
length_coef = as.numeric(''),
speed_coef = as.numeric(''),
total_light_coef = as.numeric(''),
total_heavy_coef = as.numeric(''),
stringsAsFactors = F)
model_results_list <- list()
for (i in 1:length(sources)) {
model <- lm(ten_km_contribution ~ length + speed + total_light + total_heavy,
data=training_data[training_data$pollutant == sources[i],])
model_results_list[[i]] <- model
model_results[i,] <- c(sources[i], as.numeric(coef(model)[1]), as.numeric(coef(model)[2]), as.numeric(coef(model)[3]), as.numeric(coef(model)[4]), as.numeric(coef(model)[5]))
rm(model)
}
model_results$intercept <- as.numeric(model_results$intercept)
model_results$length_coef <- as.numeric(model_results$length_coef)
model_results$speed_coef <- as.numeric(model_results$speed_coef)
model_results$total_light_coef <- as.numeric(model_results$total_light_coef)
model_results$total_heavy_coef <- as.numeric(model_results$total_heavy_coef)
Now use the regression equation on the testing 20% data
testing_data$prediction <- NA
for (i in 1:length(sources)) {
testing_data[testing_data$pollutant == sources[i],'prediction'] <- model_results[model_results$pollutant == sources[i],]$intercept+(
(testing_data[testing_data$pollutant == sources[i],]$length * model_results[model_results$pollutant == sources[i],]$length_coef) +
(testing_data[testing_data$pollutant == sources[i],]$speed * model_results[model_results$pollutant == sources[i],]$speed_coef) +
(testing_data[testing_data$pollutant == sources[i],]$total_light * model_results[model_results$pollutant == sources[i],]$total_light_coef) +
(testing_data[testing_data$pollutant == sources[i],]$total_heavy * model_results[model_results$pollutant == sources[i],]$total_heavy_coef))
}
Plot the prediction against the actuals for the testing 20%